library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
library(ggplot2)


chlamydia = read.csv("https://query.data.world/s/cu0r246l5ljt4q3411q66hoq",header=T)
aids = read.csv("https://query.data.world/s/b75zwogm4rndt5lwhkmrfnhwc",header=T)

#chlamydia.backup = chlamydia
#aids.backup = aids

chlamydia$Cases = as.numeric(chlamydia$Cases)
chlamydia$Population = as.numeric(chlamydia$Population)

aids$Cases = as.numeric(aids$Cases)
aids$Population = as.numeric(aids$Population)


summary(chlamydia)
##      Indicator        Year               Geography        FIPS      
##  Chlamydia:896   Min.   :2000   Alabama       : 16   Min.   : 1.00  
##                  1st Qu.:2004   Alaska        : 16   1st Qu.:17.75  
##                  Median :2008   American Samoa: 16   Median :31.50  
##                  Mean   :2008   Arizona       : 16   Mean   :32.54  
##                  3rd Qu.:2011   Arkansas      : 16   3rd Qu.:46.25  
##                  Max.   :2015   California    : 16   Max.   :78.00  
##                                 (Other)       :800                  
##                     Race             Sex               Age.group  
##  All races/ethnicities:896   Both sexes:896   All age groups:896  
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##                                                                   
##                           Misc                     Rate    
##  All transmission categories:896   Data not available: 32  
##                                    306.5             :  3  
##                                    487.5             :  3  
##                                    151.9             :  2  
##                                    152.8             :  2  
##                                    154.7             :  2  
##                                    (Other)           :852  
##      Cases         Population   
##  Min.   :  1.0   Min.   :  1.0  
##  1st Qu.:222.8   1st Qu.:210.8  
##  Median :440.5   Median :419.5  
##  Mean   :440.7   Mean   :420.4  
##  3rd Qu.:660.2   3rd Qu.:632.2  
##  Max.   :850.0   Max.   :811.0  
## 
summary(aids)
##        Indicator        Year               Geography        FIPS      
##  AIDS deaths:840   Min.   :2000   Alabama       : 15   Min.   : 1.00  
##                    1st Qu.:2003   Alaska        : 15   1st Qu.:17.75  
##                    Median :2007   American Samoa: 15   Median :31.50  
##                    Mean   :2007   Arizona       : 15   Mean   :32.54  
##                    3rd Qu.:2011   Arkansas      : 15   3rd Qu.:46.25  
##                    Max.   :2014   California    : 15   Max.   :78.00  
##                                   (Other)       :750                  
##                     Race             Sex     
##  All races/ethnicities:840   Both sexes:840  
##                                              
##                                              
##                                              
##                                              
##                                              
##                                              
##                    Age.group                            Misc    
##  Ages 13 years and older:840   All transmission categories:840  
##                                                                 
##                                                                 
##                                                                 
##                                                                 
##                                                                 
##                                                                 
##       Rate            Cases         Population   
##  Min.   : 0.000   Min.   :  1.0   Min.   :  1.0  
##  1st Qu.: 1.800   1st Qu.: 85.0   1st Qu.:210.8  
##  Median : 3.600   Median :208.0   Median :420.5  
##  Mean   : 5.801   Mean   :202.3   Mean   :420.5  
##  3rd Qu.: 6.700   3rd Qu.:307.0   3rd Qu.:630.2  
##  Max.   :76.900   Max.   :416.0   Max.   :840.0  
## 
chlamydia$Disease = 'Chlamydia'
aids$Disease = 'AIDS'

df.all = rbind(chlamydia,aids)
## Warning in `[<-.factor`(`*tmp*`, ri, value = c(7.2, 2.6, 0, 4.9, 3.7, 7, :
## invalid factor level, NA generated
df.all$Disease = as.factor(df.all$Disease)
df.all$Year = as.factor(df.all$Year)

df = df.all %>%
  group_by(Disease,Year) %>%
  summarise(Population=sum(Population),Cases=sum(Cases),Rate=(100*sum(Cases))/sum(Population))

p = ggplot(df, aes(x=Year,y=Population,col=Disease))
p + geom_line(aes(group=Disease))

#Discrepancy between Populations in data sets... AIDS is almost exactly linear...

df = df.all %>%
  filter(as.numeric(Year) <= 2014) %>%
  group_by(Disease,Geography) %>%
  summarise(Population = sum(Population))

p = ggplot(df,aes(x=Disease,y=Population))
p + geom_bar(stat='identity') + facet_wrap(~Geography)

library(trelliscopejs)
## Warning: replacing previous import by 'ggplot2::%+%' when loading
## 'trelliscopejs'
## Warning: replacing previous import by 'ggplot2::facet_wrap' when loading
## 'trelliscopejs'
qplot(Year, Rate, data = df.all) +
  theme_bw() +
  facet_trelliscope(~ Geography + Disease, nrow = 4, ncol = 6,self_contained=TRUE)